#------------------------------------------------------------------------------
# Import Libraries 
#------------------------------------------------------------------------------

# Run on Computer
rm(list = ls())
library(tidyverse)
library(PanelMatch)
library(data.table)
library("zoo")
library("tictoc")
library('fst')
library("fixest")
library("PanelMatch") 
library("patchwork")
library("rio")
library("magrittr") 
library("janitor")
library('did')
library("panelView")
library("vtable")
library("RPushbullet")

#------------------------------------------------------------------------------



#------------------------------------------------------------------------------
# Define Paths
#------------------------------------------------------------------------------

# R studio
setwd( dirname(rstudioapi::getActiveDocumentContext()$path) )
# R default : unccoment if you use default R
# setwd(getSrcDirectory(function(){})[1])
#------------------------------------------------------------------------------




# Comment: Please note that the following code consists of three steps. 
# Step 1: Generates the matching models.
# Step 2: Generates inputs needed for the plots.
# Step 3: Generates the plots.
# First two steps takes extensive time to run. 
# To facilitate the generation of figures in step 3 we have saved 
# the output from step 2 in th replication folder. 



#-------------------------------------------------------------------------------
# Step 1
#-------------------------------------------------------------------------------

# Comment: Step 1 - Running the matching models and saving the final outputs.
# This step involves executing matching models, which are known to be time-consuming.
# Each model takes approximately 12 hours to complete on the cluster.


# Preparing data to panelmatch

df_final <- fread("vcf_data_complete.csv")
df_final <- df_final %>%
  filter( year >= 1995 )
ex_ante_med = quantile(df_final$cover_1990, 0.5)
df_final <- df_final %>%
  filter( cover_1990 > ex_ante_med ) %>%
  as.data.frame()

names(df_final)
cols_use_match <- c("cellid", "year","nl_p95",
                    "D", "num_state", "forest_index", 
                    "con_Mining50",  "con_Mining50_4",   
                    "con_Mining50_3", "con_Mining50_2", 
                    "con_Mining50_1" )

df_match <- df_final[, cols_use_match ]
unique(df_final$state)
exact_match_vars <- c("num_state", "con_Mining50_4",   
                      "con_Mining50_3", "con_Mining50_2", 
                      "con_Mining50_1")


# Matching con_Mining50 Using CBPSCORE 4 Years
match_ps_vcf_con_Mining50 = PanelMatch(
  lag = 4, time.id = "year", unit.id = "cellid",
  treatment = "D", refinement.method = "ps.weight",
  data = df_match,
  covs.formula = ~ I(lag(forest_index, 1:4)) 
  + I(lag(nl_p95, 1:4))  ,
  exact.match.variables = exact_match_vars,
  outcome.var = "con_Mining50",
  size.match = 5, qoi = "att",
  match.missing = FALSE, listwise.delete = TRUE,
  lead = 0:4, forbid.treatment.reversal = F,
  use.diagonal.variance.matrix = T
)

save(match_ps_vcf_con_Mining50,
     "appendix_figureA7_step1_ps.RData" )


# Matching con_Mining50 Using CBPSCORE 4 Years

match_cbps_vcf_con_Mining50 = PanelMatch(
  lag = 4, time.id = "year", unit.id = "cellid",
  treatment = "D", refinement.method = "CBPS.weight",
  data = df_match,
  covs.formula = ~ I(lag(forest_index, 1:4)) 
  + I(lag(nl_p95, 1:4))  ,
  exact.match.variables = exact_match_vars,
  outcome.var = "con_Mining50",
  size.match = 5, qoi = "att",
  match.missing = FALSE, listwise.delete = TRUE,
  lead = 0:4, forbid.treatment.reversal = F,
  use.diagonal.variance.matrix = T
)

save(match_cbps_vcf_con_Mining50,
     file = file.path( "appendix_figureA7_step1_cbps.RData" ) )

#-------------------------------------------------------------------------------


#-------------------------------------------------------------------------------
# Step 2
#-------------------------------------------------------------------------------

# Comment: Step 2 - Performing operations on the outputs from Step 1.
# Once the matching models have finished running and the final outputs are saved,
# this step involves performing various operations on those outputs to
# generate the required inputs for the plot.


load( "appendix_figureA7_step1_cbps.RData" )
load( "appendix_figureA7_step1_ps.RData" )

# Generate Objects
ps_results_vcf = PanelEstimate(sets = match_ps_vcf_con_Mining50,
                               data = df_match,
                               number.iterations = 1000)
cbps_results_vcf = PanelEstimate(sets = match_cbps_vcf_con_Mining50,
                                 data = df_match,
                                 number.iterations = 1000 )
# Get Variables
matchrestable = \(x, v) data.frame(v, with(x, cbind(lead, estimates, standard.error)))
variables_vec <- c("0con_Mining50" )
results_list <- list( list( ps_results_vcf,
                            cbps_results_vcf ) )

length(results_list)
datalist = list()
for (i in 1:length(variables_vec)) {
  print(i)
  matchEst = rbind(
    matchrestable(results_list[[i]][[1]], "1Pscore"),
    matchrestable(results_list[[i]][[2]], "2CBPS")
  )
  matchEst$variable <- variables_vec[i]
  datalist[[i]] <- matchEst
}
df_aux1 <- bind_rows(datalist, .id = "column_label")  %>% filter( lead > -1 )

df_aux1$variable <- factor(df_aux1$variable, levels = c("0con_Mining50" ),
                           labels = c( "Mining" ) )


# Save objects for plot
save( ps_results_vcf, 
      cbps_results_vcf, 
      df_aux1, 
      file = "appendix_figureA7_input_step2.RData" )


#------------------------------------------------------------------------------





#-------------------------------------------------------------------------------
# Step 3
#-------------------------------------------------------------------------------

load("appendix_figureA7_input_step2.RData")

### Only Mining50
(f2 = ggplot(df_aux1 %>% filter( variable == "Mining" ) ,
             aes(x= lead,
                 y = estimates,
                 colour = v,
                 shape = v ) ) +
    geom_pointrange(position = position_dodge2(width = .2),
                    aes(ymin = estimates - 1.96 * `standard.error`,
                        ymax = estimates + 1.96 * `standard.error`),
                    alpha = 1) +
    scale_color_manual( values = c("red", "black"),
                        labels = c( "Propensity Score", 
                                    "Covariate Balancing Propensity Score" ) ) +
    scale_shape_manual( values=c( 1, 4 ),
                        guide = "none" ) +
    guides( color = guide_legend( override.aes = list( shape = c(1, 4 )))) +
    geom_hline( yintercept = 0, linetype = 'dotted' ) +
    scale_color_manual( values = c("red", "black"),
                        labels = c( "Propensity Score", 
                                    "Covariate Balancing Propensity Score" ) ) +
    labs(y = "Treatment Effect on \nForest Index",
         x = "Years since PESA",
         colour = "") +
    theme(panel.grid.major = element_blank(),
          panel.background = element_blank(),
          axis.line = element_line(colour = "black"),
          legend.position = "bottom", 
          text = element_text( size = 28 )
    )
)

ggsave( "appendix_figureA7.png", 
        f2, device = png, width = 10, 
        height = 6, dpi = 150, units = "in")

#------------------------------------------------------------------------------